#This is to get:
#FigureA7a
#FigureA7b
#FigureA7c
#FigureA7d

#FigureA8a
#FigureA8b
#FigureA8c
#FigureA8d
#FigureA8e

#FigureA9

#FigureA10

rm(list = ls())
###########
#Libraries#
###########
library("ggplot2")
library("glue")
library('gridExtra')
library('grid')
library('lfe')
library("rddtools")

setwd("/Users/bgpopescu/Dropbox/Legacies_Central_Europe/")
setwd("C:/Users/bogdanp/Dropbox/Legacies_Central_Europe/")
##########
#Read CSV#
##########
data_1930 <- read.csv(file = './data/data_ro_1930.csv')


###################
#Defining Distance#
###################


data_1930$dist1 <- as.numeric( with (data_1930,ifelse(data_1930$treat==1, 1, -1)))
data_1930$dist2 <-data_1930$dist1*data_1930$Ott_Habs_brd_distance


unique(data_1930$Ott_Habs_brd_NEAR_FID)


data_1930$bfe1<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==1, 1, 0)
data_1930$bfe2<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==9, 1, 0)
data_1930$bfe3<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==10, 1, 0)
data_1930$bfe4<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==11, 1, 0)
data_1930$bfe5<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==12, 1, 0)
data_1930$bfe6<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==13, 1, 0)
data_1930$bfe7<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==14, 1, 0)
data_1930$bfe8<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==15, 1, 0)

#data<-subset(data, pct_romanian>70)
####################
# Helper functions #
####################
comp_dist <- function(data_sample, y, x, spec, title_spec) {
  # Takes:
  #   - data_sample, a dataframe with the relevant data
  #   - y, a string with my dependent variable
  #   - x, a list of strings with my independent variables
  #   - spec, model specification
  #   - title_spec, label for the model specification (to appear on
  #                 the title of the graph)
  # Returns:
  #   Nothing, creates one picture in the folder. This function
  #   compares bandwidths in increments of 1 Km for the outcomes
  #   of interest (y) and the specified model. 
  
  # Define range of potential bandwidths
  sequence = seq(from = 35, to = 120, by = 1)
  
  #Create empty variables
  confint1 = rep(NA, length(sequence))
  confint2 = rep(NA, length(sequence))
  confint3 = rep(NA, length(sequence))
  confint4 = rep(NA, length(sequence))
  coef1 = rep(NA, length(sequence))
  
  
  # This loop calculates local linear estimates
  # for bandwidths from 15 to 150 kilometers
  specification = as.formula(paste(glue("{y}~"), 
                                   paste(x, collapse = "+")))
  
  for (i in 1 : length(sequence)){
    band = data_sample[which(data_sample$dist2 > -sequence[i] & 
                               data_sample$dist2 < sequence[i]),]
    model1 <- felm(specification, paste(glue("|0|0|0")), data = band)
    confint1[i] = confint(model1)[2,1]
    confint2[i] = confint(model1)[2,2]
    confint3[i] = confint(model1, level = 0.90)[2,1]
    confint4[i] = confint(model1, level = 0.90)[2,2]  
    coef1[i] = coef(model1)[2]
  }
  
  df <- data.frame(sequence = sequence, coef1 = coef1, 
                   confint1 = confint1, confint2 = confint2,
                   confint3 = confint3, confint4 = confint4)
  
  mean_feature <- ggplot(data = df, aes(sequence, coef1)) +
    geom_point(size = 1.3) +
    geom_errorbar(aes(ymin = confint1, ymax = confint2), size = 0.2, width = 0)+
    geom_errorbar(aes(ymin = confint3, ymax = confint4), size = 0.5, width = 0)+
    geom_hline(yintercept = 0) +
    theme_bw() +
    scale_x_continuous(name = "Bandwidth (km)", breaks=seq(35,120,10)) +
    scale_y_continuous(name = "Estimate") +
    ggtitle(glue("{title_spec}")) +
    theme(plot.title = element_text(hjust = 0.5),
          axis.text.x = element_text(size=14),
          axis.text.y = element_text(size=14),
          axis.title=element_text(size=16))+
    annotate(geom = "text", x=-Inf, y=Inf, label = c(paste(" Habsburg N:", nrow(data_sample[data_sample$treat==0,]),"\n", 
                                                       "Ottoman N:", nrow(data_sample[data_sample$treat==1,]))), hjust = 0, vjust = 1)+
    annotate(geom = "text", x=Inf, y=Inf, label = c(paste("Mean:", round(mean(data_sample[[y]], na.rm=TRUE),2),"\n", 
                                                           "SD:", round(sd(data_sample[[y]], na.rm=TRUE),2))), hjust = "top", vjust = "right")
  
  return(mean_feature)
}


create_graphs_per_specifications <- function(regressors, specifications, 
                                             specification_label, data_sample, data_sample_name){
  # Takes:
  #   - regressors, a list of dependent variables
  #   - specifications, lists of model specifications (indep variables)
  #   - specification_label, a list of labels for model specifications
  #   - data_sample, the dataset of interest
  #   - data_sample_name, label for the data_sample
  
  # Returns:
  #   Creates one grid of pictures in the folder. This function
  #   allows us to vary the regressors.
  
  for (y in regressors) {
    regressor_name  = y[[1]]
    regressor_label = y[[2]]
    figure_name = y[[3]]
    
    print(glue("Using dependent variable {regressor_label}.."))
    graphs <- list()
    for (spec in names(specifications)){
      print(spec)
      print(specification_label[[spec]])
      temp_graph <- comp_dist(data_sample, regressor_name, 
                              specifications[[spec]], spec, 
                              specification_label[[spec]])
      graphs[[paste(c(spec, regressor_name), collapse = '_')]] <- temp_graph
    }
    
    #Ncol may need to be changed
    grid <- grid.arrange(grobs = graphs, nrow = 1, ncol = 1,
                         top = textGrob({},
                                        gp = gpar(fontsize = 20, font = 2)))
    ggsave(grid, file = glue("figureA{data_sample_name}{figure_name}.jpg"), 
           height = 8, width = 15, 
           units = "cm", dpi = 300)
  }
  
}



########
#MODELS#
########


# Define model specifications
specifications <- list(c('treat', 'POINT_X', 'POINT_Y', 'Ott_Habs_brd_distance', 'bfe1', 'bfe2', 'bfe3', 'bfe4', 'bfe5', 'bfe6', 'bfe7'))


names(specifications) <- c("meanbfeXYdist")

specification_label<-c("Treat + Lat-Lon + Dist. Brd + BFE")

names(specification_label)<-c("meanbfeXYdist")


regressors<-list(c("elev", "elev", "a"),
                 c("slope", "slope", "b"),
                 c("prec_mean", "prec_mean", "c"),
                 c("tavg_mean", "tavg_mean", "d"))


shortcut = glue("./Paper/graphs/")
setwd(shortcut)

create_graphs_per_specifications(regressors, specifications, 
                                 specification_label, data_1930, '7')



regressors<-list(c("wheat_rainfed", "wheat_rainfed", "a"),
                 c("sea_lines_distance", "sea_lines_distance", "b"),
                 c("large_rivers_distance", "large_rivers_distance", "c"))


shortcut = glue("./Paper/graphs/")
setwd(shortcut)

create_graphs_per_specifications(regressors, specifications, 
                                 specification_label, data_1930, '8')



regressors<-list(c("pct_flotant", "pct_flotant", "a"),
                 c("pct_romanian", "pct_romanian", "b"),
                 c("pct_hungarian", "pct_hungarian", "c"),
                 c("pct_german", "pct_german", "d"))

shortcut = glue("./Paper/graphs/")
setwd(shortcut)

create_graphs_per_specifications(regressors, specifications, 
                                 specification_label, data_1930, '9')


regressors<-list(c("route_density", "route_density", ""))


shortcut = glue("./Paper/graphs/")
setwd(shortcut)

create_graphs_per_specifications(regressors, specifications, 
                                 specification_label, data_1930, '10')


